@ -2,21 +2,44 @@ package kube
import (
import (
"bytes"
"bytes"
_ "fmt"
"fmt"
"io"
"io"
"sort"
_ "sort"
"strings"
"strings"
"time"
"time"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/fields"
_ "k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/watch"
"k8s.io/apimachinery/pkg/watch"
"k8s.io/kubernetes/pkg/apis/batch"
"k8s.io/kubernetes/pkg/apis/core"
"k8s.io/kubernetes/pkg/apis/core"
"k8s.io/kubernetes/pkg/kubectl/resource"
"k8s.io/kubernetes/pkg/kubectl/resource"
)
)
var (
WatchFeedStub = & WatchFeedProto {
WriteJobLogChunkFunc : func ( JobLogChunk ) error { return nil } ,
WriteJobPodErrorFunc : func ( JobPodError ) error { return nil } ,
}
)
type WatchFeed interface {
type WatchFeed interface {
WriteJobLogChunk ( * JobLogChunk ) error
WriteJobLogChunk ( JobLogChunk ) error
WriteJobPodError ( JobPodError ) error
}
// Prototype-struct helper to create feed with callbacks specified in-place of creation (such as WatchFeedStub var)
type WatchFeedProto struct {
WriteJobLogChunkFunc func ( JobLogChunk ) error
WriteJobPodErrorFunc func ( JobPodError ) error
}
func ( proto * WatchFeedProto ) WriteJobLogChunk ( arg JobLogChunk ) error {
return proto . WriteJobLogChunkFunc ( arg )
}
func ( proto * WatchFeedProto ) WriteJobPodError ( arg JobPodError ) error {
return proto . WriteJobPodErrorFunc ( arg )
}
}
type LogLine struct {
type LogLine struct {
@ -24,217 +47,325 @@ type LogLine struct {
Data string
Data string
}
}
type JobLogChunk struct {
type PodLogChunk struct {
JobName string
PodName string
PodName string
ContainerName string
ContainerName string
LogLines [ ] LogLine
LogLines [ ] LogLine
}
}
type WriteJobLogChunkFunc func ( * JobLogChunk ) error
type PodError struct {
Message string
PodName string
ContainerName string
}
type JobLogChunk struct {
PodLogChunk
JobName string
}
func ( f WriteJobLogChunkFunc ) WriteJobLogChunk ( chunk * JobLogChunk ) error {
type JobPodError struct {
return f ( chunk )
PodError
JobName string
}
}
type WatchMonitor struct {
type WatchMonitor struct {
kube * Client
Kube * Client
timeout time . Duration
Timeout time . Duration
watchFeed WatchFeed
Namespace string
Namespace string
ResourceName string
ResourceName string
UID types . UID
InitialResourceVersion string
}
}
type PodWatchMonitor struct {
type PodWatchMonitor struct {
WatchMonitor
WatchMonitor
Manifest * core . Pod
PodLogChunk chan * PodLogChunk
PodError chan PodError
Error chan error
InitContainersNames [ ] string
ContainerMonitorStates map [ string ] string
ProcessedInitContainersNames [ ] string
ProcessedContainerLogTimestamps map [ string ] time . Time
ProcessedInitContainersIDs [ ] string
ContainersNames [ ] string
InitContainersNames [ ] string
ProcessedContainersNames [ ] string
ContainersNames [ ] string
ProcessedContainersIDs [ ] string
}
}
func ( pod * PodWatchMonitor ) GetMonitoredContainersNames ( ) [ ] string {
func ( pod * PodWatchMonitor ) FollowContainerLogs ( containerName string ) error {
res := make ( [ ] string , 0 )
client , err := pod . Kube . ClientSet ( )
if err != nil {
return err
}
FilterProcessedContainersNames :
// var sinceTime *metav1.Time
for _ , name := range pod . ContainersNames {
// if v, found := pod.ProcessedContainerLogTimestamps[containerName]; found {
for _ , processedContainerName := range pod . ProcessedContainersNames {
// sinceTime = &metav1.Time{v}
if processedContainerName == name {
// }
continue FilterProcessedContainersNames
}
req := client . Core ( ) .
}
Pods ( pod . Namespace ) .
res = append ( res , name )
GetLogs ( pod . ResourceName , & core . PodLogOptions {
Container : containerName ,
Timestamps : true ,
Follow : true ,
} )
readCloser , err := req . Stream ( )
if err != nil {
return err
}
}
defer readCloser . Close ( )
return res
lineBuf := bytes . Buffer { }
}
rawBuf := make ( [ ] byte , 4096 )
func ( pod * PodWatchMonitor ) SetContainerProcessed ( containerName string , containerID string ) {
for {
pod . ProcessedContainersNames = append ( pod . ProcessedContainersNames , containerName )
n , err := readCloser . Read ( rawBuf )
pod . ProcessedContainersIDs = append ( pod . ProcessedContainersIDs , containerID )
if err != nil && err == io . EOF {
}
break
} else if err != nil {
return err
}
func ( pod * PodWatchMonitor ) GetMonitoredInitContainersNames ( ) [ ] string {
chunkLines := make ( [ ] LogLine , 0 )
res := make ( [ ] string , 0 )
for i := 0 ; i < n ; i ++ {
if rawBuf [ i ] == '\n' {
lineParts := strings . SplitN ( lineBuf . String ( ) , " " , 2 )
if len ( lineParts ) == 2 {
chunkLines = append ( chunkLines , LogLine { Timestamp : lineParts [ 0 ] , Data : lineParts [ 1 ] } )
}
FilterProcessedInitContainersNames :
lineBuf . Reset ( )
for _ , name := range pod . InitContainersNames {
continue
for _ , processedInitContainerName := range pod . ProcessedInitContainersNames {
if processedInitContainerName == name {
continue FilterProcessedInitContainersNames
}
}
lineBuf . WriteByte ( rawBuf [ i ] )
}
pod . PodLogChunk <- & PodLogChunk {
PodName : pod . ResourceName ,
ContainerName : containerName ,
LogLines : chunkLines ,
}
}
res = append ( res , name )
}
}
return res
return nil
}
func ( pod * PodWatchMonitor ) SetInitContainerProcessed ( containerName string , containerID string ) {
// buf := bytes.Buffer{}
pod . ProcessedInitContainersNames = append ( pod . ProcessedInitContainersNames , containerName )
// _, err = io.Copy(&buf, readCloser)
pod . ProcessedInitContainersIDs = append ( pod . ProcessedInitContainersIDs , containerID )
}
func ( pod * PodWatchMonitor ) RefreshManifest ( ) error {
// lines := strings.Split(strings.TrimSuffix(buf.String(), "\n"), "\n")
client , err := pod . kube . ClientSet ( )
if err != nil {
return err
}
manifest , err := client . Core ( ) .
// res := make([]LogLine, 0)
Pods ( pod . Namespace ) .
// for _, line := range lines {
Get ( pod . ResourceName , metav1 . GetOptions { } )
// lineParts := strings.SplitN(line, " ", 2)
if err != nil {
// if len(lineParts) == 2 {
return err
// ll := LogLine{
}
// Timestamp: lineParts[0],
pod . Manifest = manifest
// Data: lineParts[1],
// }
// res = append(res, ll)
// pod.Kube.Log(">>> %s", ll)
// }
// }
return nil
// if len(res) > 0 {
}
// t, err := time.Parse(time.RFC3339, res[len(res)-1].Timestamp)
// if err != nil {
// return nil, err
// }
// pod.ProcessedContainerLogTimestamps[containerName] = t
// }
func ( pod * PodWatchMonitor ) GetReadyCondition ( ) ( res * core . PodCondition ) {
// logLines, err := pod.GetContainerLogs(containerName)
for i , _ := range pod . Manifest . Status . Conditions {
// if err != nil {
if pod . Manifest . Status . Conditions [ i ] . Type == "Ready" {
// return err
res = & pod . Manifest . Status . Conditions [ i ]
// }
break
}
// pod.PodLogChunk <- &PodLogChunk{
}
// PodName: pod.ResourceName,
return
// ContainerName: containerName,
// LogLines: logLines,
// }
// return res, nil
}
}
func ( pod * PodWatchMonitor ) GetInitContainerStatus ( containerName string ) ( res * core . ContainerStatus ) {
func ( pod * PodWatchMonitor ) WatchContainerLogs ( containerName string ) error {
for i , _ := range pod . Manifest . Status . InitContainerStatuses {
for {
if pod . Manifest . Status . InitContainerStatuses [ i ] . Name == containerName {
for _ , containerName := range pod . ContainersNames {
res = & pod . Manifest . Status . InitContainerStatuses [ i ]
switch pod . ContainerMonitorStates [ containerName ] {
break
case "Running" , "Terminated" :
return pod . FollowContainerLogs ( containerName )
case "Waiting" :
default :
}
}
}
time . Sleep ( time . Duration ( 200 ) * time . Millisecond )
}
}
return
return nil
}
}
func ( pod * PodWatchMonitor ) GetContainerStatus ( containerName string ) ( res * core . ContainerStatus ) {
func ( pod * PodWatchMonitor ) Watch ( ) error {
for i , _ := range pod . Manifest . Status . ContainerStatuses {
for _ , containerName := range pod . ContainersNames {
if pod . Manifest . Status . ContainerStatuses [ i ] . Name == containerName {
go func ( ) {
res = & pod . Manifest . Status . ContainerStatuses [ i ]
err := pod . WatchContainerLogs ( containerName )
break
if err != nil {
}
pod . Error <- err
}
} ( )
}
}
return
}
func ( pod * PodWatchMonitor ) GetContainerLogs ( containerName string ) ( [ ] LogLine , error ) {
client , err := pod . Kube . ClientSet ( )
client , err := pod . kube . ClientSet ( )
if err != nil {
if err != nil {
return nil , err
return err
}
}
req := client . Core ( ) .
watcher, err := client . Core ( ) . Pods ( pod . Namespace ) .
Pods( pod . Namespace ) .
Watch( metav1 . ListOptions {
GetLogs ( pod . ResourceName , & core . PodLogOptions {
ResourceVersion : pod . InitialResourceVersion ,
Container: containerNam e,
Watch: tru e,
Timestamps: true ,
FieldSelector: fields . OneTermEqualSelector ( "metadata.name" , pod . ResourceName ) . String ( ) ,
} )
} )
readCloser , err := req . Stream ( )
if err != nil {
if err != nil {
return nil , err
return err
}
}
defer readCloser . Close ( )
buf := bytes . Buffer { }
_ , err = watch . Until ( pod . Timeout , watcher , func ( e watch . Event ) ( bool , error ) {
_ , err = io . Copy ( & buf , readCloser )
pod . Kube . Log ( "[DEBUG] Pod %s event: %+v" , pod . ResourceName , e )
res := make ( [ ] LogLine , 0 )
object , ok := e . Object . ( * core . Pod )
for _ , line := range strings . Split ( strings . TrimSuffix ( buf . String ( ) , "\n" ) , "\n" ) {
if ! ok {
lineParts := strings . SplitN ( line , " " , 2 )
return true , fmt . Errorf ( "Expected %s to be a *core.Pod, got %T" , pod . ResourceName , e . Object )
// TODO: receive only new log lines, save state into PodWatchMonitor
}
if len ( lineParts ) == 2 {
ll := LogLine {
for _ , cs := range object . Status . ContainerStatuses {
Timestamp : lineParts [ 0 ] ,
oldState := pod . ContainerMonitorStates [ cs . Name ]
Data : lineParts [ 1 ] ,
if cs . State . Waiting != nil {
pod . ContainerMonitorStates [ cs . Name ] = "Waiting"
switch cs . State . Waiting . Reason {
case "ImagePullBackOff" , "ErrImagePull" , "CrashLoopBackOff" :
pod . PodError <- PodError {
ContainerName : cs . Name ,
PodName : pod . ResourceName ,
Message : fmt . Sprintf ( "%s: %s" , cs . State . Waiting . Reason , cs . State . Waiting . Message ) ,
}
}
}
if cs . State . Running != nil {
pod . ContainerMonitorStates [ cs . Name ] = "Running"
}
if cs . State . Terminated != nil {
pod . ContainerMonitorStates [ cs . Name ] = "Running"
}
if oldState != pod . ContainerMonitorStates [ cs . Name ] {
pod . Kube . Log ( "Pod %s container %s state changed %v -> %v" , pod . ResourceName , cs . Name , oldState , pod . ContainerMonitorStates [ cs . Name ] )
}
}
res = append ( res , ll )
}
}
}
return res , nil
return false , nil
} )
return nil
}
}
type JobWatchMonitor struct {
type JobWatchMonitor struct {
WatchMonitor
WatchMonitor
MonitoredPodsQueue [ ] * PodWatchMonitor
State string
ProcessedPodsUIDs [ ] types . UID
Started chan bool
Succeeded chan bool
Error chan error
AddedPod chan * PodWatchMonitor
PodLogChunk chan * PodLogChunk
PodError chan PodError
MonitoredPods [ ] * PodWatchMonitor
}
}
func ( job * JobWatchMonitor ) WaitTillResourceVersionAdded ( resourceVersion string , jobInfo * resource . Info ) error {
func ( job * JobWatchMonitor ) Watch ( ) error {
w , err := resource .
client , err := job . Kube . ClientSet ( )
NewHelper ( jobInfo . Client , jobInfo . Mapping ) .
if err != nil {
WatchSingle ( job . Namespace , job . ResourceName , resourceVersion )
return err
}
watcher , err := client . Batch ( ) . Jobs ( job . Namespace ) .
Watch ( metav1 . ListOptions {
ResourceVersion : job . InitialResourceVersion ,
Watch : true ,
FieldSelector : fields . OneTermEqualSelector ( "metadata.name" , job . ResourceName ) . String ( ) ,
} )
if err != nil {
if err != nil {
return err
return err
}
}
_ , err = watch . Until ( job . timeout , w , func ( e watch . Event ) ( bool , error ) {
_ , err = watch . Until ( job . Timeout , watcher , func ( e watch . Event ) ( bool , error ) {
if e . Type == watch . Added {
job . Kube . Log ( "[DEBUG] Job %s event: %+v" , job . ResourceName , e )
return true , nil
switch job . State {
case "" :
if e . Type == watch . Added {
job . Started <- true
oldState := job . State
job . State = "Started"
job . Kube . Log ( "[DEBUG] Job %s watcher state changed %v -> %v" , job . ResourceName , oldState , job . State )
job . Kube . Log ( "[DEBUG] Starting job %s pods watcher" , job . ResourceName )
go func ( ) {
err := job . WatchPods ( )
if err != nil {
job . Error <- err
}
} ( )
}
case "Started" :
object , ok := e . Object . ( * batch . Job )
if ! ok {
return true , fmt . Errorf ( "Expected %s to be a *batch.Job, got %T" , job . ResourceName , e . Object )
}
for _ , c := range object . Status . Conditions {
if c . Type == batch . JobComplete && c . Status == core . ConditionTrue {
oldState := job . State
job . State = "Succeeded"
job . Kube . Log ( "[DEBUG] Job %s watcher state changed %v -> %v" , job . ResourceName , oldState , job . State )
job . Kube . Log ( "%s: Jobs active: %d, jobs failed: %d, jobs succeeded: %d" , job . ResourceName , object . Status . Active , object . Status . Failed , object . Status . Succeeded )
return true , nil
} else if c . Type == batch . JobFailed && c . Status == core . ConditionTrue {
oldState := job . State
job . State = "Failed"
job . Kube . Log ( "[DEBUG] Job %s watcher state changed %v -> %v" , job . ResourceName , oldState , job . State )
return true , fmt . Errorf ( "Job failed: %s" , c . Reason )
}
}
default :
return true , fmt . Errorf ( "Unknown job %s watcher state: %s" , job . ResourceName , job . State )
}
}
return false , nil
return false , nil
} )
} )
return err
if err != nil {
}
return err
func ( job * JobWatchMonitor ) TakeNextMonitoredPod ( ) * PodWatchMonitor {
if len ( job . MonitoredPodsQueue ) == 0 {
return nil
}
}
var res * PodWatchMonitor
return nil
res , job . MonitoredPodsQueue = job . MonitoredPodsQueue [ 0 ] , job . MonitoredPodsQueue [ 1 : ]
return res
}
func ( job * JobWatchMonitor ) SetPodProcessed ( uid types . UID ) {
job . ProcessedPodsUIDs = append ( job . ProcessedPodsUIDs , uid )
}
func ( job * JobWatchMonitor ) SchedulePodMonitoring ( pod * PodWatchMonitor ) {
job . MonitoredPodsQueue = append ( job . MonitoredPodsQueue , pod )
}
}
func ( job * JobWatchMonitor ) RefreshMonitoredPods ( ) error {
func ( job * JobWatchMonitor ) WatchPods ( ) error {
job . kube . Log ( "RefreshMonitoredPods" ) // TODO: remove
client , err := job . Kube . ClientSet ( )
client , err := job . kube . ClientSet ( )
if err != nil {
if err != nil {
return err
return err
}
}
@ -245,242 +376,146 @@ func (job *JobWatchMonitor) RefreshMonitoredPods() error {
if err != nil {
if err != nil {
return err
return err
}
}
job . kube . Log ( "jobManifest: %+v" , jobManifest ) // TODO: remove
selector , err := metav1 . LabelSelectorAsSelector ( jobManifest . Spec . Selector )
selector , err := metav1 . LabelSelectorAsSelector ( jobManifest . Spec . Selector )
if err != nil {
if err != nil {
return err
return err
}
}
podList , err := client . Core ( ) .
podList Watcher , err := client . Core ( ) .
Pods ( job . Namespace ) .
Pods ( job . Namespace ) .
List ( metav1 . ListOptions { LabelSelector : selector . String ( ) } )
Watch ( metav1 . ListOptions {
Watch : true ,
LabelSelector : selector . String ( ) ,
} )
if err != nil {
if err != nil {
return err
return err
}
}
job . kube . Log ( "podList: %+v" , podList ) // TODO: remove
// TODO filter out pods that does not belong to controller-uid=job-uid
// TODO calculate timeout since job-watch started
_ , err = watch . Until ( job . Timeout , podListWatcher , func ( e watch . Event ) ( bool , error ) {
job . Kube . Log ( "[DEBUG] Job %s pods list event: %+v" , job . ResourceName , e )
// Add new pods to monitor queue in chronological order by creation timestamp
podObject , ok := e . Object . ( * core . Pod )
podItems := make ( [ ] core . Pod , 0 )
if ! ok {
for _ , item := range podList . Items {
return true , fmt . Errorf ( "Expected %s to be a *core.Pod, got %T" , job . ResourceName , e . Object )
podItems = append ( podItems , item )
}
sort . Slice ( podItems , func ( i , j int ) bool {
return podItems [ i ] . CreationTimestamp . Time . Before ( podItems [ j ] . CreationTimestamp . Time )
} )
searchNewPods :
for _ , item := range podItems {
// filter out under processing
for _ , monitoredPod := range job . MonitoredPodsQueue {
// TODO is there a need to check resource-version change?
if monitoredPod . UID == item . UID {
continue searchNewPods
}
}
}
// filter out already processed
for _ , processedPodUID := range job . ProcessedPodsUIDs {
for _ , pod := range job . MonitoredPods {
if processedPodUID == item . UID {
if pod . ResourceName == podObject . Name {
continue searchNewPods
// Already under monitoring
return false , nil
}
}
}
}
// TODO constructor from job & podObject
pod := & PodWatchMonitor {
pod := & PodWatchMonitor {
WatchMonitor : WatchMonitor {
WatchMonitor : WatchMonitor {
kube : job . kube ,
Kube : job . Kube ,
timeout : job . timeout ,
Timeout : job . Timeout ,
watchFeed : job . watchFeed ,
Namespace : job . Namespace ,
Namespace : job . Namespace ,
ResourceName : item . Name ,
ResourceName : podObject . Name ,
UID: item . UID ,
InitialResourceVersion: "" ,
} ,
} ,
PodLogChunk : job . PodLogChunk ,
PodError : job . PodError ,
Error : job . Error ,
ContainerMonitorStates : make ( map [ string ] string ) ,
ProcessedContainerLogTimestamps : make ( map [ string ] time . Time ) ,
}
}
if err = pod . RefreshManifest ( ) ; err != nil {
return err
for _ , containerConf := range podObject . Spec . InitContainers {
}
for _ , containerConf := range pod . Manifest . Spec . InitContainers {
pod . InitContainersNames = append ( pod . InitContainersNames , containerConf . Name )
pod . InitContainersNames = append ( pod . InitContainersNames , containerConf . Name )
}
}
for _ , containerConf := range pod . Manifes t. Spec . Containers {
for _ , containerConf := range pod Objec t. Spec . Containers {
pod . ContainersNames = append ( pod . ContainersNames , containerConf . Name )
pod . ContainersNames = append ( pod . ContainersNames , containerConf . Name )
}
}
job . MonitoredPodsQueue = append ( job . MonitoredPodsQueue , pod )
job . MonitoredPods = append ( job . MonitoredPods , pod )
}
job . kube . Log ( "RefreshMonitoredPods done: MonitoredPodsQueue: %+v" , job . MonitoredPodsQueue ) // TODO: remove
go func ( ) {
err := pod . Watch ( )
if err != nil {
job . Error <- err
}
} ( )
job . AddedPod <- pod
return false , nil
} )
return nil
return nil
}
}
func ( c * Client ) WatchJobsTillDone ( namespace string , reader io . Reader , watchFeed WatchFeed , timeout time . Duration ) error {
infos , err := c . Build ( namespace , reader )
if err != nil {
return err
}
return perform ( infos , func ( info * resource . Info ) error {
return c . watchJobTillDone ( info , watchFeed , timeout )
} )
}
func ( c * Client ) watchJobTillDone ( jobInfo * resource . Info , watchFeed WatchFeed , timeout time . Duration ) error {
func ( c * Client ) watchJobTillDone ( jobInfo * resource . Info , watchFeed WatchFeed , timeout time . Duration ) error {
if jobInfo . Mapping . GroupVersionKind . Kind != "Job" {
if jobInfo . Mapping . GroupVersionKind . Kind != "Job" {
return nil
return nil
}
}
uid , err := jobInfo . Mapping . UID ( jobInfo . Object )
// TODO: constructor
if err != nil {
return err
}
job := & JobWatchMonitor {
job := & JobWatchMonitor {
WatchMonitor : WatchMonitor {
WatchMonitor : WatchMonitor {
kube : c ,
Kube : c ,
timeout : timeout ,
Timeout : timeout ,
watchFeed : watchFeed ,
Namespace : jobInfo . Namespace ,
Namespace : jobInfo . Namespace ,
ResourceName : jobInfo . Name ,
ResourceName : jobInfo . Name ,
UID: uid ,
InitialResourceVersion: jobInfo . ResourceVersion ,
} ,
} ,
}
if err := job . WaitTillResourceVersionAdded ( jobInfo . ResourceVersion , jobInfo ) ; err != nil {
Started : make ( chan bool , 0 ) ,
return err // TODO
Succeeded : make ( chan bool , 0 ) ,
}
AddedPod : make ( chan * PodWatchMonitor , 10 ) ,
PodLogChunk : make ( chan * PodLogChunk , 1000 ) ,
if err = job . RefreshMonitoredPods ( ) ; err != nil {
PodError : make ( chan PodError , 0 ) ,
return err // TODO
Error : make ( chan error , 0 ) ,
}
}
var processPod * PodWatchMonitor
c . Log ( "[DEBUG] Starting job %s watcher" , job . ResourceName )
go func ( ) {
// TODO: split method into corresponding functions
err := job . Watch ( )
if err != nil {
TakeNextMonitoredPod :
job . Error <- err
for {
if processPod = job . TakeNextMonitoredPod ( ) ; processPod == nil {
break
}
if err := processPod . RefreshManifest ( ) ; err != nil {
c . Log ( "Pod %s refresh manifest failed: %s" , processPod . ResourceName , err )
// TODO stream system-error to feed
job . SetPodProcessed ( processPod . UID )
continue TakeNextMonitoredPod
}
c . Log ( "Pod manifest refreshed, ResourceVersion: %s" , processPod . Manifest . ResourceVersion )
if processPod . Manifest . Status . Phase == core . PodPending {
c . Log ( "Pod %s is in PENDING state" , processPod . ResourceName )
if podReadyCondition := processPod . GetReadyCondition ( ) ; podReadyCondition != nil {
c . Log ( "Pod %s ready condition: %+v" , processPod . ResourceName , podReadyCondition )
if podReadyCondition . Status != core . ConditionTrue {
// TODO: init-containers-statuses
for _ , containerStatus := range processPod . Manifest . Status . ContainerStatuses {
if containerStatus . Ready {
continue
}
if containerStatus . State . Waiting != nil {
c . Log ( "Pod %s container %s is in waiting state: %s: %s" , processPod . ResourceName , containerStatus . Name , containerStatus . State . Waiting . Reason , containerStatus . State . Waiting . Message )
switch containerStatus . State . Waiting . Reason {
case "ImagePullBackOff" , "ErrImagePull" :
// TODO stream bad_image user-error
processPod . SetContainerProcessed ( containerStatus . Name , containerStatus . ContainerID )
case "CrashLoopBackOff" :
// TODO stream container_crash user-error
processPod . SetContainerProcessed ( containerStatus . Name , containerStatus . ContainerID )
}
}
if containerStatus . State . Terminated != nil {
// TODO dig more, think more.
// TODO not necessary because in that container state we still able to reach containers logs
// TODO what about failed state? we should stream user-error about incorrectly terminated container
// TODO init-container should be finally terminated in normal situation
// TODO that error should follow logs and not preceede them
// TODO so it is needed to move that if into after send-logs-for-container section
c . Log ( "Pod %s container %s (%s) is in terminated state: %s: %s" , processPod . ResourceName , containerStatus . Name , containerStatus . State . Terminated . ContainerID , containerStatus . State . Terminated . Reason , containerStatus . State . Terminated . Message )
processPod . SetContainerProcessed ( containerStatus . Name , containerStatus . ContainerID )
}
}
job . SchedulePodMonitoring ( processPod )
time . Sleep ( time . Duration ( 1 ) * time . Second ) // TODO remove this
continue TakeNextMonitoredPod
// TODO: fetch state and stream to feed userspace-error
}
}
}
// TODO: init-containers
ProcessContainers :
for _ , containerName := range processPod . GetMonitoredContainersNames ( ) {
c . Log ( "Process pod %s container %s" , processPod . ResourceName , containerName )
if containerStatus := processPod . GetContainerStatus ( containerName ) ; containerStatus != nil {
c . Log ( "Process pod %s container %s status: %+v" , processPod . ResourceName , containerName , containerStatus )
if containerStatus . State . Waiting != nil {
if containerStatus . State . Waiting . Reason == "RunContainerError" {
// TODO: stream userspace-error container_stuck to watch feed
processPod . SetContainerProcessed ( containerName , containerStatus . ContainerID )
continue ProcessContainers
}
}
} else {
c . Log ( "Process pod %s container %s status not available" , processPod . ResourceName , containerName )
}
logLines , err := processPod . GetContainerLogs ( containerName )
if err != nil {
c . Log ( "Error getting job %s pod %s container %s log chunk: %s" , job . ResourceName , processPod . ResourceName , containerName , err )
}
chunk := & JobLogChunk {
JobName : job . ResourceName ,
PodName : processPod . ResourceName ,
ContainerName : containerName ,
LogLines : logLines ,
}
if err = job . watchFeed . WriteJobLogChunk ( chunk ) ; err != nil {
c . Log ( "Error writing job %s pod %s container %s log chunk to watch feed: %s" , chunk . JobName , chunk . PodName , chunk . ContainerName , err )
}
}
}
} ( )
if len ( processPod . GetMonitoredContainersNames ( ) ) > 0 {
for {
job . SchedulePodMonitoring ( processPod )
select {
case <- job . Started :
c . Log ( "Job %s started" , job . ResourceName )
// TODO watchFeed
case <- job . Succeeded :
return nil
case err := <- job . Error :
return err
case pod := <- job . AddedPod :
c . Log ( "Job %s pod %s added" , job . ResourceName , pod . ResourceName )
case podLogChunk := <- job . PodLogChunk :
watchFeed . WriteJobLogChunk ( JobLogChunk {
PodLogChunk : * podLogChunk ,
JobName : job . ResourceName ,
} )
case podError := <- job . PodError :
watchFeed . WriteJobPodError ( JobPodError {
JobName : job . ResourceName ,
PodError : podError ,
} )
}
}
time . Sleep ( time . Duration ( 1 ) * time . Second ) // TODO: show logs flawlessly without any suspension if there is something to show, also use ticker
}
// TODO: wait till job done event
// TODO: make refresh for pods while waiting: job.RefreshMonitoredPods()
// TODO: it is not necessary to refresh list on every tick
// TODO: add following event watch before ending this function
// switch e.Type {
// case watch.Added, watch.Modified:
// c.Log("o = %v", o)
// c.Log("o.Status = %v", o.Status)
// c.Log("o.Status.Conditions = %v", o.Status.Conditions)
// for _, c := range o.Status.Conditions {
// if c.Type == batchinternal.JobComplete && c.Status == core.ConditionTrue {
// return true, nil
// } else if c.Type == batchinternal.JobFailed && c.Status == core.ConditionTrue {
// return true, fmt.Errorf("Job failed: %s", c.Reason)
// }
// }
// c.Log("%s: Jobs active: %d, jobs failed: %d, jobs succeeded: %d", jobInfo.Name, o.Status.Active, o.Status.Failed, o.Status.Succeeded)
// return false, nil
// }
return nil
}
func ( c * Client ) WatchJobsTillDone ( namespace string , reader io . Reader , watchFeed WatchFeed , timeout time . Duration ) error {
infos , err := c . Build ( namespace , reader )
if err != nil {
return err
}
}
return perform ( infos , func ( info * resource . Info ) error {
return c . watchJobTillDone ( info , watchFeed , timeout )
} )
}
}